pacman::p_load(tidyverse, data.table, broom, readxl)
rm(list=ls())
`%!in%` = Negate(`%in%`)
################################################################################

###### Spatial units

spatial_id_a = 'county_id'
spatial_id_b = 'amc_id'

###### Reference commodity for zetat shocks

ref_commodity = 'pasture'
df_yield_obs = fread(paste0('../../data/agronomy/clean/stockingrates_argentinabrazil_',spatial_id_b,'.csv.gz'))[,c('year','spatial_id','cattle_per_ha')]

###### Within-nest parameters

df_zetat_ratio = fread(paste0("inputs/parameters_supply_within_zetat_ratio.csv"))
df_within = fread(paste0("inputs/parameters_supply_within_tl.csv"))
df_within$frontier = 0
frontier_list = c('NORTE','NORDESTE','NOA','NEA','PATAGONIA','CUYO')
df_within[region %in% frontier_list,]$frontier = 1
theta_lambda_core = max(df_within$theta_lambda)
theta_lambda_frontier = min(df_within$theta_lambda)
theta_lambda_rescaled_core = max(df_within$theta_lambda_rescaled)
theta_lambda_rescaled_frontier = min(df_within$theta_lambda_rescaled)

###### Lambda guess

spec_within = fread('inputs/spec_scale_within.csv')
lambda_guess_core = 0.28 
lambda_guess_frontier =  0.05 
rescale_lambda = unique(spec_within$rescale_lambda)
lambda_rescaled_guess_core = lambda_guess_core + rescale_lambda
lambda_rescaled_guess_frontier = lambda_guess_frontier + rescale_lambda
theta_guess_core = theta_lambda_core*lambda_guess_core
theta_guess_frontier = theta_lambda_frontier*lambda_guess_frontier
theta_rescaled_guess_core = theta_lambda_rescaled_core*lambda_rescaled_guess_core
theta_rescaled_guess_frontier = theta_lambda_rescaled_frontier*lambda_rescaled_guess_frontier

###### Factors

df_factors = setDT(read_excel('../../data/factors/raw/factor_shares.xlsx'))[,c('commodity','land_share')]
setnames(df_factors, old='commodity', new='landuse')
current_spec = data.table(rbind(c(theta_lambda_core, theta_lambda_frontier, lambda_guess_core, lambda_guess_frontier)))
setnames(current_spec, new=c('theta_lambda_core', 'theta_lambda_frontier', 'lambda_guess_core','lambda_guess_frontier'))
write.csv(current_spec, paste0('inputs/spec_data_across.csv'), row.names = FALSE)

###### Baseline variables

for(country in c('argentina','brazil') ){
  
  df_l = fread(paste0('../../data/landuse/clean/landuse_',country,'_county.csv.gz'))
  df_u = fread(paste0('../../data/landuse/clean/geographicunits_',country,'.csv.gz'))
  df_yc = fread(paste0('../../data/agronomy/clean/faogaez_',country,'.csv.gz'))
  df_yp = fread(paste0('../../data/agronomy/clean/pastureproductivity_argentinabrazil.csv.gz'))[unit=='head per hectare',]
  setnames(df_yp, old = c('int_mean'),  new = c('yield'))
  setnames(df_yc, old = c('int_mean'),  new = c('yield'))
  
  if (country=='argentina'){
    
    ### Spatial unit crosswalk
    df_u = df_u[, list(county_id, state_id, region)]
    
    ### Land
    df = merge(df_l, df_u, by='county_id')
    df = df[,c('forest','pasture') := list(forest_natural+forest_planted, pasture+forage_seasonal+forage_permanent)][, c('farmland') := list(soybean+maize+wheat+sunflower+pasture)]
    setnames(df, old = spatial_id_a,  new = c('spatial_id'))
    df_land_a = df[,  list(year, region, state_id, spatial_id, forest, farmland)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, region, state_id, spatial_id)]
    
    ### Land shares
    df_land_C = df[,  list(year, region, state_id, spatial_id, forest, farmland)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, region, state_id, spatial_id)]
    df_land_c = df[,  list(year, region, state_id, spatial_id, pasture, soybean, maize, wheat, sunflower)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, region, state_id, spatial_id)][, unit:='hectares']
    
    # Across nest
    df = df_land_C[, c('share') := list(farmland/(farmland+forest))]
    df = df[,c('year','spatial_id','share')]
    df = melt(df, id.vars = c('year','spatial_id'),  value.name ='shareC')
    df_sharesC = df[,c('year','spatial_id','shareC')]
    
    # Within-nest
    df_land_c$soybean=as.numeric(df_land_c$soybean)
    df_land_c$maize=as.numeric(df_land_c$maize)
    df_land_c$wheat=as.numeric(df_land_c$wheat)
    df_land_c$sunflower=as.numeric(df_land_c$sunflower)
    dfc = melt(df_land_c[,c('year','spatial_id','pasture','soybean', 'maize', 'wheat','sunflower')], id.vars = c('year','spatial_id'), variable.name = "commodity", value.name ='area')
    dfC = melt(df_land_C[,c('year','spatial_id','farmland')], id.vars = c('year','spatial_id'), variable.name = "typeC", value.name ='area')
    df2 = merge(dfc,dfC, by=c('year','spatial_id'))[, sharecC:=area.x/area.y]
    df_sharescC = df2[,c('year','spatial_id','commodity','sharecC')]
    df_landshare = merge(df_sharescC,df_sharesC, by=c('year','spatial_id'), all.x=T)

    ### FAO-GAEZ yields
    yield_list = c('soybean','maize','wheat','sunflower')
    df_yp = df_yp[country=='ARGENTINA', list(county_id, crop, yield, unit)][, lapply(.SD, mean, na.rm=TRUE), by=list(county_id, crop, unit)]
    df_yc = df_yc[crop %in% yield_list, list(county_id, crop, yield)][, lapply(.SD, mean, na.rm=TRUE), by=list(county_id, crop)][, unit:='tonne per hectare']
    df = merge(na.omit(rbind(df_yp,df_yc), cols="yield"), df_u, by='county_id')
    setnames(df, old = c(spatial_id_a, 'crop'),  new = c('spatial_id','commodity'))
    df_yield_a = df[, list(region, state_id, spatial_id, commodity, yield, unit)][, lapply(.SD, mean, na.rm=TRUE), by=list(region, state_id, spatial_id, commodity, unit)]
    
    ### Farm-gate commodity prices
    df = fread(paste0('../../data/prices/clean/farmgate_crops_argentina_',spatial_id_a,'.csv.gz'))
    df=df[year!=2018,]
    df[year==2017,]$year=2018
    df_pc = df[commodity %in% yield_list, list(year, region, state_id, spatial_id, commodity, price)][, lapply(.SD, mean, na.rm=TRUE), by=list(year, region, state_id, spatial_id, commodity)][, unit:='current USD per tonne']
    df = fread(paste0('../../data/prices/clean/farmgate_cattle_argentina_',spatial_id_a,'.csv.gz'))[, commodity:='pasture']
    setnames(df, old = c('value'),  new = c('price'))
    df_pp = df[establishment_type %in% c('all establishments') & value_type %in% c('price_per_head'), list(year, region, state_id, spatial_id, commodity, price)][, lapply(.SD, mean, na.rm=TRUE), by=list(year, region, state_id, spatial_id, commodity)][, unit:='current USD per head']
    df_prices_a = rbind(df_pc, df_pp)[is.finite(price),]
    
    ### Reference zetat shock
    df = merge(df_landshare,df_yield_a[,c('spatial_id','commodity','yield')], by=c('spatial_id','commodity'))
    df = merge(df, df_within[,c('spatial_id','theta_lambda','theta_lambda_rescaled','frontier')], by='spatial_id')
    df = merge(df, df_yield_obs[,c('year','spatial_id','cattle_per_ha')], by=c('year','spatial_id'))
    df = merge(df, df_prices_a[commodity==ref_commodity,c('year','spatial_id','price')], by=c('year','spatial_id'))
    df = merge(df, df_factors, by.x=c('commodity'), by.y=('landuse'))
    df$observed_component = ( df$land_share/( df$price^((1/df$land_share)-1) ) )*df$cattle_per_ha
    
    df$theta = theta_guess_core
    df[frontier==1,]$theta = theta_guess_frontier
    df$theta_rescaled = theta_rescaled_guess_core
    df[frontier==1,]$theta_rescaled = theta_rescaled_guess_frontier
    df = df[, c('zetat_ref','zetat_ref_rescaled') := list( (observed_component/yield)*((sharecC)^(1/theta_lambda))*((shareC)^(1/theta)) , (observed_component/yield)*((sharecC)^(1/theta_lambda_rescaled))*((shareC)^(1/theta_rescaled)) )]
    df_zetat_ref = df[commodity==ref_commodity,][!is.na(zetat_ref),]
    setnames(df_zetat_ref, old='commodity', new='landuse')

    ### Levels of zetat shock
    
    #Soybean zetat
    df1 = df_zetat_ratio[landuse==ref_commodity,][,c('year','spatial_id','landuse','zetat_ratio','zetat_ratio_rescaled')]
    df2 = df_zetat_ref[,c('year','spatial_id','zetat_ref','zetat_ref_rescaled')]
    df = merge(df1,df2, by=c('year','spatial_id'))
    df$zetat= df$zetat_ref*df$zetat_ratio
    df$zetat_rescaled= df$zetat_ref_rescaled*df$zetat_ratio_rescaled
    df$landuse = 'soybean'
    df_zetat_soybean = df[,c('year','spatial_id','landuse','zetat','zetat_rescaled')]
    
    #Other zetat
    df = merge(df_zetat_ratio[, c('year','spatial_id','landuse','zetat_ratio','zetat_ratio_rescaled')],df_zetat_soybean[,-c('landuse')], by=c('year','spatial_id'))
    df$zetat = 1/(df$zetat_ratio/df$zetat)
    df$zetat_rescaled = 1/(df$zetat_ratio_rescaled/df$zetat_rescaled)
    df_zetat_other = df[,-c('zetat_ratio','zetat_ratio_rescaled')]
    
    #All zetat
    df_zetat = rbind(df_zetat_soybean,df_zetat_other)
    setnames(df_zetat, old='landuse', new='commodity')
    df_zetat_ar = df_zetat

    ### Payoffs per ha
    df = merge(df_prices_a, df_yield_a, by=c('region', 'state_id', 'spatial_id', 'commodity'))
    df = merge(df, df_zetat, by=c('year', 'spatial_id', 'commodity'), all.x=T)
    setnames(df, old = c('commodity'),  new = c('landuse'))
    df = merge(df, df_factors, by='landuse')
    df=df[, c('payoff_per_ha'):=list((price^(1/land_share))*yield*zetat_rescaled)]
    df1 = df[, list(year, region, state_id, spatial_id, landuse, payoff_per_ha)]
    df = fread("../../data/prices/clean/farmgate_cattle_argentina_county_id.csv.gz")[value_type=='payoff_per_ha', list(spatial_id, year, value)]
    df = merge(df, df_u, by.x='spatial_id', by.y='county_id')
    setnames(df, old=c('value'), new=c('payoff_per_ha'))
    df$landuse = 'pasture_value'
    df2 = df
    df_payoff_a = rbind(df1[, list(year, region, state_id, spatial_id, landuse, payoff_per_ha)],df2[, list(year, region, state_id, spatial_id, landuse, payoff_per_ha)])
    
    ### Nest payoff
    df = merge(df_within, df_payoff_a,  by=c('spatial_id','region'))
    df[, c('payoff_nest'):= list(payoff_per_ha^theta_lambda)]
    nest_list = c('soybean','maize','wheat','sunflower','pasture','pasture_value')
    df_payoff_nest_a = df[landuse %in% nest_list, list(year, region, state_id, spatial_id, payoff_nest)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, region, state_id, spatial_id)]

    ### Final variables for OLS
    df_ols_a = merge(df_land_a, df_payoff_nest_a, by=c('year','region','state_id','spatial_id'))
    df_ols_a$country='ARGENTINA'    
    
  } else {
    
    ### Spatial unit crosswalk
    df_u = df_u[, list(county_id, amc_id, microregion_id, mesoregion_id, state_id, region)]
    
    ### Land
    df = merge(df_l, df_u, by='county_id')
    df = df[, c('pasture','forest') := list(pasture_natural+pasture_planted, forest_natural+forest_planted)][, c('farmland') := list(soybean+maize+wheat+rice+sugarcane+pasture)]
    setnames(df, old = c(spatial_id_b),  new = c('spatial_id'))
    df_land_b = df[,  list(year, region, state_id, spatial_id, forest, farmland)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, region, state_id, spatial_id)]

    ### Land shares
    df_land_C = df[,  list(year, region, state_id, spatial_id, forest, farmland)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, region, state_id, spatial_id)]
    df_land_c = df[,  list(year, region, state_id, spatial_id, pasture, soybean, maize, wheat, rice, sugarcane)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, region, state_id, spatial_id)][, unit:='hectares']

    # Across nest
    df = df_land_C[, c('share') := list(farmland/(farmland+forest))]
    df = df[,c('year','spatial_id','share')]
    df = melt(df, id.vars = c('year','spatial_id'),  value.name ='shareC')
    df_sharesC = df[,c('year','spatial_id','shareC')]
    
    # Within-nest
    dfc = melt(df_land_c[,c('year','spatial_id','pasture','soybean', 'maize', 'wheat','rice', 'sugarcane')], id.vars = c('year','spatial_id'), variable.name = "commodity", value.name ='area')
    dfC = melt(df_land_C[,c('year','spatial_id','farmland')], id.vars = c('year','spatial_id'), variable.name = "typeC", value.name ='area')
    df2 = merge(dfc,dfC, by=c('year','spatial_id'))[, sharecC:=area.x/area.y]
    df_sharescC = df2[,c('year','spatial_id','commodity','sharecC')]
    df_landshare = merge(df_sharescC,df_sharesC, by=c('year','spatial_id'), all.x=T)

    ### FAO-GAEZ yields
    yield_list = c('soybean','maize','wheat','rice','sugarcane')
    df_yp = df_yp[country=='BRAZIL', list(county_id, crop, yield, unit)][, lapply(.SD, mean, na.rm=TRUE), by=list(county_id, crop, unit)]
    df_yc = df_yc[crop %in% yield_list, list(county_id, crop, yield)][, lapply(.SD, mean, na.rm=TRUE), by=list(county_id, crop)][, unit:='tonne per hectare']
    df = merge(na.omit(rbind(df_yp,df_yc), cols="yield"), df_u, by='county_id')
    setnames(df, old = c(spatial_id_b, 'crop'),  new = c('spatial_id','commodity'))
    df_yield_b = df[, list(region, state_id, spatial_id, commodity, yield, unit)][, lapply(.SD, mean, na.rm=TRUE), by=list(region, state_id, spatial_id, commodity, unit)]
    
    ### Farm-gate commodity prices
    df = fread(paste0('../../data/prices/clean/farmgate_crops_brazil_',spatial_id_b,'.csv.gz'))
    df_pc = df[commodity %in% yield_list, list(year, region, state_id, spatial_id, commodity, price)][, lapply(.SD, mean, na.rm=TRUE), by=list(year, region, state_id, spatial_id, commodity)][, unit:='current USD per tonne']
    df = fread(paste0('../../data/prices/clean/farmgate_cattle_brazil_',spatial_id_b,'.csv.gz'))[, commodity:='pasture']
    setnames(df, old = c('value'),  new = c('price'))
    df_pp = df[establishment_type %in% c('cattle establishments') & value_type %in% c('breeders_p','calves_p'), list(year, region, state_id, spatial_id, commodity, price)][, lapply(.SD, mean, na.rm=TRUE), by=list(year, region, state_id, spatial_id, commodity)][, unit:='current USD per head']
    df_prices_b = rbind(df_pc, df_pp)[is.finite(price),]
    
    ### Reference zetat shock
    df = merge(df_landshare,df_yield_b[,c('spatial_id','commodity','yield')], by=c('spatial_id','commodity'))
    df = merge(df, df_within[,c('spatial_id','theta_lambda','theta_lambda_rescaled','frontier')], by='spatial_id')
    df = merge(df, df_yield_obs[,c('year','spatial_id','cattle_per_ha')], by=c('year','spatial_id'))
    df = merge(df, df_prices_b[commodity==ref_commodity,c('year','spatial_id','price')], by=c('year','spatial_id'))
    df = merge(df, df_factors, by.x=c('commodity'), by.y=('landuse'))
    df$observed_component = ( df$land_share/( df$price^((1/df$land_share)-1) ) )*df$cattle_per_ha
    
    df$theta = theta_guess_core
    df[frontier==1,]$theta = theta_guess_frontier
    df$theta_rescaled = theta_rescaled_guess_core
    df[frontier==1,]$theta_rescaled = theta_rescaled_guess_frontier
    df = df[, c('zetat_ref','zetat_ref_rescaled') := list( (observed_component/yield)*((sharecC)^(1/theta_lambda))*((shareC)^(1/theta)) , (observed_component/yield)*((sharecC)^(1/theta_lambda_rescaled))*((shareC)^(1/theta_rescaled)) )]
    df_zetat_ref = df[commodity==ref_commodity,][!is.na(zetat_ref),]
    setnames(df_zetat_ref, old='commodity', new='landuse')
    
    ### Levels of zetat shock
    
    #Soybean zetat
    df1 = df_zetat_ratio[landuse==ref_commodity,][,c('year','spatial_id','landuse','zetat_ratio','zetat_ratio_rescaled')]
    df2 = df_zetat_ref[,c('year','spatial_id','zetat_ref','zetat_ref_rescaled')]
    df = merge(df1,df2, by=c('year','spatial_id'))
    df$zetat= df$zetat_ref*df$zetat_ratio
    df$zetat_rescaled= df$zetat_ref_rescaled*df$zetat_ratio_rescaled
    df$landuse = 'soybean'
    df_zetat_soybean = df[,c('year','spatial_id','landuse','zetat','zetat_rescaled')]
    
    #Other zetat
    df = merge(df_zetat_ratio[, c('year','spatial_id','landuse','zetat_ratio','zetat_ratio_rescaled')],df_zetat_soybean[,-c('landuse')], by=c('year','spatial_id'))
    df$zetat = 1/(df$zetat_ratio/df$zetat)
    df$zetat_rescaled = 1/(df$zetat_ratio_rescaled/df$zetat_rescaled)
    df_zetat_other = df[,-c('zetat_ratio','zetat_ratio_rescaled')]

    #All zetat
    df_zetat = rbind(df_zetat_soybean,df_zetat_other)
    setnames(df_zetat, old='landuse', new='commodity')
    df_zetat_br = df_zetat
    
    ### Payoffs per ha
    df = merge(df_prices_b, df_yield_b, by=c('region', 'state_id', 'spatial_id', 'commodity'))
    df = merge(df, df_zetat, by=c('year', 'spatial_id', 'commodity'), all.x=T)
    setnames(df, old = c('commodity'),  new = c('landuse'))
    df = merge(df, df_factors, by='landuse')
    df=df[, c('payoff_per_ha'):=list((price^(1/land_share))*yield*zetat_rescaled)]
    df1 = df[, list(year, region, state_id, spatial_id, landuse, payoff_per_ha)]
    df = fread(paste0('../../data/prices/clean/pastureland_values_brazil.csv.gz'))
    df = merge(df, df_u, by='county_id')
    setnames(df, old = c(spatial_id_b,'price_per_ha_current_USD'),  new = c('spatial_id','payoff_per_ha'))
    df$landuse = 'pasture_value'
    df2 = df
    df_payoff_b = rbind(df1[, list(year, region, state_id, spatial_id, landuse, payoff_per_ha)],df2[, list(year, region, state_id, spatial_id, landuse, payoff_per_ha)])
    
    ### Nest payoff
    df = merge(df_within, df_payoff_b,  by=c('spatial_id','region'))
    df[, c('payoff_nest'):= list(payoff_per_ha^theta_lambda)]
    nest_list =  c('soybean','maize','wheat','rice','sugarcane','pasture_value')
    df_payoff_nest_b = df[landuse %in% nest_list, list(year, region, state_id, spatial_id, payoff_nest)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, region, state_id, spatial_id)]
    
    ### Final variables for OLS
    df_ols_b = merge(df_land_b, df_payoff_nest_b, by=c('year','region', 'state_id','spatial_id'))
    df_ols_b$country='BRAZIL'
    
  }
  
}
df_ols = rbind(df_ols_a,df_ols_b)
df_zetat = rbind(df_zetat_ar,df_zetat_br)
write.csv(df_zetat, paste0("inputs/parameters_supply_across_zetat_level.csv"), row.names = FALSE)

###### Exposure shares

id_type_a = c('county_known')
id_type_b = c('county_known')
year_max = 2018
for (country in c('argentina','brazil') ){
  
  if (country=='argentina'){
    
    df = fread(paste0('../../data/agribusiness/clean/soybean_',country,'.csv.gz'))[id_type %in% id_type_a & state %!in% c('UNKNOWN','IMPORTS + STOCK') & destination_bloc != 'UNKNOWN',]
    setnames(df, old = c(spatial_id_a),  new = c('spatial_id'))
    dfx = df[, list(year, country, region, state_id, spatial_id,destination_bloc,equivalent_tonnes, fob_usd, commodity)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, country, region, state_id, spatial_id, destination_bloc, commodity)]
    dfy = df[, list(year, country, region, state_id, spatial_id,equivalent_tonnes,fob_usd, commodity)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, country, region, state_id, spatial_id, commodity)]
    dfxy = merge(dfx,dfy, by=c('year','country','region','state_id','spatial_id','commodity'))[, c('exposure_share') := list(equivalent_tonnes.x/equivalent_tonnes.y)][, list(year, country, region, state_id, spatial_id, destination_bloc, exposure_share, commodity)]
    df_a = dfxy[year<=year_max,][, c("year") := NULL][, lapply(.SD, mean, na.rm=TRUE),  by=list(country, region, state_id, spatial_id, destination_bloc, commodity)]
    
  }else{
    
    n_c=1
    
    for (commodity in c('soybean','maize','beef')){
      
      df = fread(paste0('../../data/agribusiness/clean/',commodity,'_',country,'.csv.gz'))[id_type %in% id_type_b & state !='UNKNOWN STATE' & destination != 'BRAZIL' & destination_bloc != 'UNKNOWN',] 
      setnames(df, old = c(spatial_id_b),  new = c('spatial_id'))
      dfx = df[, list(year, country, region, state_id, spatial_id,destination_bloc,equivalent_tonnes, fob_usd, commodity)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, country, region, state_id, spatial_id, destination_bloc, commodity)]
      dfy = df[, list(year, country, region, state_id, spatial_id,equivalent_tonnes,fob_usd, commodity)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, country, region, state_id, spatial_id, commodity)]
      dfxy = merge(dfx,dfy, by=c('year','country','region','state_id','spatial_id','commodity'))[, c('exposure_share') := list(equivalent_tonnes.x/equivalent_tonnes.y)][, list(year, country, region, state_id, spatial_id, destination_bloc, exposure_share, commodity)]
      df_b_c = dfxy[year<=year_max,][, c("year") := NULL][, lapply(.SD, mean, na.rm=TRUE),  by=list(country, region, state_id, spatial_id, destination_bloc,commodity)]
      
      if(n_c==1){df_b = df_b_c}else{df_b = rbind(df_b,df_b_c)}
      
      n_c=n_c+1
      
    } 
  }
}
df_iv_shares = rbind(df_a,df_b)
df_iv_shares$landuse = df_iv_shares$commodity
df_iv_shares[commodity=='beef',]$landuse='pasture'
df_iv_shares[,c("commodity"):=c(NULL)]

###### Temporal variation

df = fread(paste0('../../data/trade/clean/imports_from_world_iv.csv.gz'))
df$landuse = df$item
df[landuse=='MAIZE']$landuse = 'maize'
df[landuse=='OIL, SUNFLOWER']$landuse = 'sunflower'
df[landuse=='RICE, MILLED']$landuse = 'rice'
df[landuse=='SOYBEANS']$landuse = 'soybean'
df[landuse=='WHEAT']$landuse = 'wheat'
df[landuse=='SUGAR RAW CENTRIFUGAL']$landuse = 'sugarcane'
df[landuse=='MEAT, CATTLE, BONELESS (BEEF & VEAL)']$landuse = 'pasture'
df[unit=='TONNES']$unit = 'tonnes'
df[unit=='USD']$unit = 'usd'
df_c = fread(paste0('../../data/trade/raw/destinations_trase_faostat_crosswalk.csv'))[, list(destination_bloc, destination)]
df = merge(df_c, df, by=c('destination'))
product_type_landuse=c('maize','pasture','sunflower','rice','soybean','sugarcane','wheat')
df_iv_t = df[landuse %in% product_type_landuse, list(year, destination_bloc, landuse, unit, residual_imports_from_world)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, destination_bloc, landuse, unit)]

df = df_iv_t
df$year_agg = 0
df[year %in% c(2000,2001,2002,2003,2004)]$year_agg = 2002
df[year %in% c(2006,2007,2008,2009,2010)]$year_agg = 2008
df[year %in% c(1993,1994,1995,1996,1997)]$year_agg = 1995
df[year %in% c(2003,2004,2006,2007,2008)]$year_agg = 2006
df[year %in% c(2015,2016,2017,2018,2019)]$year_agg = 2017
setnames(df, old=c('year','year_agg'), new=c('year_agg','year'))
df[, year_agg:=NULL]
df_iv_t_agg =  df[year %in% c(1995,2002,2008,2006,2017), lapply(.SD, mean, na.rm=TRUE), by=list(year, destination_bloc, landuse, unit)]
df_iv_t_agg2 = df_iv_t_agg[year==2017,]
df_iv_t_agg2$year = 2018
df_iv_t_agg = rbind(df_iv_t_agg,df_iv_t_agg2)

###### Interaction

df = dcast(df_iv_t_agg,  year + destination_bloc + landuse ~ unit, value.var = "residual_imports_from_world")
setnames(df, old=c('usd'), new=c('demand_shift'))
df_iv_t_wide = df
df = merge(df_iv_shares[, list(country, spatial_id, destination_bloc, exposure_share, landuse)], df_iv_t_wide, by=c('landuse','destination_bloc'), allow.cartesian=TRUE)
df[, c('iv') := list(exposure_share*demand_shift)]
df_iv = df[, list(year, country, spatial_id, iv)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, country, spatial_id)]

###### Final dataset

df= merge(df_ols, df_iv,  by=c('year','country','spatial_id'), all.x=T)
write.csv(df, gzfile(paste0("inputs/data_supply_across.csv.gz")), row.names = FALSE)

